# Load packages
library(here)
library(dplyr)
library(readr)
library(rstan)
library(bayesrules)
library(tidyverse)
library(bayesplot)
library(rstanarm)
library(janitor)
library(tidybayes)
library(broom.mixed)
library(here)
library(sf)
library(tidycensus)
library(openxlsx)
library(s2)
# themes
theme_set(theme_minimal())
vari_names <- read_csv(here("clean_data", "nyc_names.csv"))
nyc_clean <- st_read(here("clean_data", "nyc_data.shp"), crs = 4269)
## Reading layer `nyc_data' from data source
## `/Users/freddy/Documents/GitHub/454/clean_data/nyc_data.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 223 features and 24 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -74.04731 ymin: 40.55685 xmax: -73.70002 ymax: 40.91758
## Geodetic CRS: NAD83
colnames(nyc_clean) <- colnames(vari_names)
subway_stations <- st_read(here("ethnic","Data","stations", "geo_export_85568705-efba-4456-bdc0-3d70ff2cf8e5.shp")) %>%
st_transform(., 4269)
## Reading layer `geo_export_85568705-efba-4456-bdc0-3d70ff2cf8e5' from data source
## `/Users/freddy/Documents/GitHub/454/ethnic/Data/stations/geo_export_85568705-efba-4456-bdc0-3d70ff2cf8e5.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 473 features and 5 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -74.03088 ymin: 40.57603 xmax: -73.7554 ymax: 40.90313
## Geodetic CRS: WGS84(DD)
bus_stations <- st_read(here("ethnic","Data","bus", "bus_stops_nyc_may2020.shp")) %>%
st_transform(., 4269)
## Reading layer `bus_stops_nyc_may2020' from data source
## `/Users/freddy/Documents/GitHub/454/ethnic/Data/bus/bus_stops_nyc_may2020.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 14663 features and 6 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: 914169.1 ymin: 122626.8 xmax: 1066982 ymax: 271696.8
## Projected CRS: NAD83 / New York Long Island (ftUS)
transit_points <- read_csv(here("transit","ridership_points.csv"))%>%
separate(Position, into=c("Point", "longitude", "latitude"), " ") %>%
mutate(latitude = str_remove_all(latitude, "[)]"),
longitude = str_remove_all(longitude, "[()]"),
) %>%
dplyr::select(-c(Point)) %>%
mutate(latitude = as.numeric(latitude),
longitude = as.numeric(longitude)) %>%
st_as_sf(coords = c("longitude", "latitude"), crs = 4269)
#plot locations over map
subway_loc <- ggplot() +
geom_sf(data = nyc_clean, fill = "#EBF6FF", color = "#D48DD8", size = 0.15, alpha = .8) +
geom_sf(data = subway_stations, color="#3F123C", size=1) +
coord_sf(datum = st_crs(subway_stations)) +
theme_minimal() +
theme(panel.grid.major = element_line("transparent"),
axis.text = element_blank()) +
ggtitle("Subway Stop Locations \nin NYC")+
theme(#panel.grid.major = element_line("transparent"),
plot.title = element_text(size = 30, face = "bold"),
legend.title = element_text(size = 12),
legend.text = element_text(size = 12)) +
guides(shape = guide_legend(override.aes = list(size = 8)),
color = guide_legend(override.aes = list(size = 8)))
bus_loc <- ggplot() +
geom_sf(data = nyc_clean, fill = "#EBF6FF", color = "#D48DD8", size = 0.15, alpha = .8) +
geom_sf(data = bus_stations, color="#3F123C", size=.5, alpha=.5) +
coord_sf(datum = st_crs(subway_stations)) +
theme_minimal() +
theme(panel.grid.major = element_line("transparent"),
axis.text = element_blank()) +
ggtitle("Bus Stop Locations \nin NYC")+
theme(#panel.grid.major = element_line("transparent"),
plot.title = element_text(size = 30, face = "bold"),
legend.title = element_text(size = 12),
legend.text = element_text(size = 12)) +
guides(shape = guide_legend(override.aes = list(size = 8)),
color = guide_legend(override.aes = list(size = 8)))
stops <- nyc_clean %>%
ggplot() +
geom_sf(aes(fill = sub_count), color = "#8f98aa") +
scale_fill_gradient(low= "lavender", high = "maroon",
guide = guide_legend(title = "Number of Subway Stops") ,na.value="#D6D6D6") +
theme_minimal() +
theme(panel.grid.major = element_line("transparent"),
axis.text = element_blank()) +
ggtitle("Subway Stop Counts \nin NYC")+
theme(panel.grid.major = element_line("transparent"),
plot.title = element_text(size = 30, face = "bold"),
legend.title = element_text(size = 12),
legend.text = element_text(size = 12)) +
guides(shape = guide_legend(override.aes = list(size = 8)),
color = guide_legend(override.aes = list(size = 8)))
bus_stops <- nyc_clean %>%
ggplot() +
geom_sf(aes(fill = bus_count), color = "#8f98aa") +
scale_fill_gradient(low= "lavender", high = "maroon",
guide = guide_legend(title = "Number of Bus Stops") ,na.value="#D6D6D6") +
theme_minimal() +
theme(panel.grid.major = element_line("transparent"),
axis.text = element_blank()) +
ggtitle("Bus Stop Counts \nin NYC")+
theme(panel.grid.major = element_line("transparent"),
plot.title = element_text(size = 30, face = "bold"),
legend.title = element_text(size = 12),
legend.text = element_text(size = 12)) +
guides(shape = guide_legend(override.aes = list(size = 8)),
color = guide_legend(override.aes = list(size = 8)))
ridership <- nyc_clean%>%
ggplot() +
geom_sf(aes(fill = log2(mean_ridership)), color = "#8f98aa") +
scale_fill_gradient(low= "lavender", high = "maroon",
guide = guide_legend(title = "Log2 Mean Ridership") ,na.value="#D6D6D6") +
theme_minimal() +
theme(panel.grid.major = element_line("transparent"),
axis.text = element_blank()) +
ggtitle("Mean (Log2) Subway Turnstile \nRidership in 2018 \nfor NYC")+
theme(panel.grid.major = element_line("transparent"),
plot.title = element_text(size = 30, face = "bold"),
legend.title = element_text(size = 12),
legend.text = element_text(size = 12)) +
guides(shape = guide_legend(override.aes = list(size = 8)),
color = guide_legend(override.aes = list(size = 8)))
access <- nyc_clean %>%
ggplot() +
geom_sf(aes(fill = perc_covered_by_transit), color = "#8f98aa") +
scale_fill_gradient(low= "lavender", high = "maroon",
guide = guide_legend(title = "Percent of Neighborhood within \n walking distance of a transit station"), na.value="#D6D6D6") +
theme_minimal() +
theme(panel.grid.major = element_line("transparent"),
axis.text = element_blank()) +
ggtitle("Transit Accessibility \nin NYC")+
theme(panel.grid.major = element_line("transparent"),
plot.title = element_text(size = 30, face = "bold"),
legend.title = element_text(size = 12),
legend.text = element_text(size = 12)) +
guides(shape = guide_legend(override.aes = list(size = 8)),
color = guide_legend(override.aes = list(size = 8)))
`
red <- ggplot(nyc_clean) +
geom_sf(aes(fill = below_poverty_line_count), color = "#8f98aa") +
scale_fill_gradient(low = "#FCF5EE", high = "#E13728", guide = guide_legend(title = "Number Below \nPoverty Line")) +
theme_minimal() +
theme(panel.grid.major = element_line("transparent"),
axis.text = element_blank()) +
ggtitle("Impoverishement")+
theme(panel.grid.major = element_line("transparent"),
plot.title = element_text(size = 26, face = "bold"),
legend.title = element_text(size = 12),
legend.text = element_text(size = 12)) +
guides(shape = guide_legend(override.aes = list(size = 8)),
color = guide_legend(override.aes = list(size = 8)))
yellow <- ggplot(nyc_clean) +
geom_sf(aes(fill = mean_income), color = "#8f98aa") +
scale_fill_gradient(low = "#FCF5EE", high = "#F3D24E", guide = guide_legend(title = "Mean Income")) +
theme_minimal() +
theme(panel.grid.major = element_line("transparent"),
axis.text = element_blank()) +
ggtitle("Mean Income")+
theme(panel.grid.major = element_line("transparent"),
plot.title = element_text(size = 26, face = "bold"),
legend.title = element_text(size = 12),
legend.text = element_text(size = 12)) +
guides(shape = guide_legend(override.aes = list(size = 8)),
color = guide_legend(override.aes = list(size = 8)))
teal <- ggplot(nyc_clean) +
geom_sf(aes(fill = mean_rent), color = "#8f98aa") +
scale_fill_gradient(low = "#FCF5EE", high = "#2DBDC7", guide = guide_legend(title = "Dollars")) +
theme_minimal() +
theme(panel.grid.major = element_line("transparent"),
axis.text = element_blank()) +
ggtitle("Mean Rent")+
theme(panel.grid.major = element_line("transparent"),
plot.title = element_text(size = 26, face = "bold"),
legend.title = element_text(size = 12),
legend.text = element_text(size = 12)) +
guides(shape = guide_legend(override.aes = list(size = 8)),
color = guide_legend(override.aes = list(size = 8)))
purple <- ggplot(nyc_clean) +
geom_sf(aes(fill = eviction_count), color = "#8f98aa")+
scale_fill_gradient(low = "#FCF5EE", high = "#7826C0", guide = guide_legend(title = "Number of Evictions")) +
theme_minimal() +
theme(panel.grid.major = element_line("transparent"),
axis.text = element_blank()) +
ggtitle("Evictions")+
theme(panel.grid.major = element_line("transparent"),
plot.title = element_text(size = 26, face = "bold"),
legend.title = element_text(size = 12),
legend.text = element_text(size = 12)) +
guides(shape = guide_legend(override.aes = list(size = 8)),
color = guide_legend(override.aes = list(size = 8)))
orange <- ggplot(nyc_clean) +
geom_sf(aes(fill = unemployment_count), color = "#8f98aa")+
scale_fill_gradient(low = "#FCF5EE", high = "#FC9228", guide = guide_legend(title = "Number on \nUnemployment")) +
theme_minimal() +
theme(panel.grid.major = element_line("transparent"),
axis.text = element_blank()) +
ggtitle("Unemployment")+
theme(panel.grid.major = element_line("transparent"),
plot.title = element_text(size = 26, face = "bold"),
legend.title = element_text(size = 12),
legend.text = element_text(size = 12)) +
guides(shape = guide_legend(override.aes = list(size = 8)),
color = guide_legend(override.aes = list(size = 8)))
green <- ggplot(nyc_clean) +
geom_sf(aes(fill = store_count), color = "#8f98aa")+
scale_fill_gradient(low = "#FCF5EE", high = "#326902", guide = guide_legend(title = "Number of Stores")) +
theme_minimal() +
theme(panel.grid.major = element_line("transparent"),
axis.text = element_blank()) +
ggtitle("Retail Food Stores")+
theme(panel.grid.major = element_line("transparent"),
plot.title = element_text(size = 26, face = "bold"),
legend.title = element_text(size = 12),
legend.text = element_text(size = 12)) +
guides(shape = guide_legend(override.aes = list(size = 8)),
color = guide_legend(override.aes = list(size = 8)))
blue <- ggplot(nyc_clean) +
geom_sf(aes(fill = school_count), color = "#8f98aa")+
scale_fill_gradient(low = "#FCF5EE", high = "#5372C4",
guide = guide_legend(title = "Number of Schools")) +
theme_minimal() +
theme(panel.grid.major = element_line("transparent"),
axis.text = element_blank()) +
ggtitle("Number of Schools")+
theme(panel.grid.major = element_line("transparent"),
plot.title = element_text(size = 26, face = "bold"),
legend.title = element_text(size = 12),
legend.text = element_text(size = 12)) +
guides(shape = guide_legend(override.aes = list(size = 8)),
color = guide_legend(override.aes = list(size = 8)))
pink <- ggplot(nyc_clean) +
geom_sf(aes(fill = total_pop), color = "#8f98aa")+
scale_fill_gradient(low = "#FCF5EE", high = "#F450E1", guide = guide_legend(title = "Number of People")) +
theme_minimal() +
theme(panel.grid.major = element_line("transparent"),
axis.text = element_blank()) +
ggtitle("Population")+
theme(panel.grid.major = element_line("transparent"),
plot.title = element_text(size = 26, face = "bold"),
legend.title = element_text(size = 12),
legend.text = element_text(size = 12)) +
guides(shape = guide_legend(override.aes = list(size = 8)),
color = guide_legend(override.aes = list(size = 8)))
brown <- ggplot(nyc_clean) +
geom_sf(aes(fill = uninsured_count), color = "#8f98aa")+
scale_fill_gradient(low = "#F8E3DD", high = "#6A4D39", guide = guide_legend(title = "Number of People \n without Insurance Coverage")) +
theme_minimal() +
theme(panel.grid.major = element_line("transparent"),
axis.text = element_blank()) +
ggtitle("Insurance Coverage")+
theme(panel.grid.major = element_line("transparent"),
plot.title = element_text(size = 26, face = "bold"),
legend.title = element_text(size = 12),
legend.text = element_text(size = 12)) +
guides(shape = guide_legend(override.aes = list(size = 8)),
color = guide_legend(override.aes = list(size = 8)))
white <- ggplot(nyc_clean) +
geom_sf(aes(fill = white_count), color = "#8f98aa") +
scale_fill_gradient(low = "#FCF5EE", high = "#7B435B", guide = guide_legend(title = "Number White")) +
theme_minimal() +
theme(panel.grid.major = element_line("transparent"),
axis.text = element_blank()) +
ggtitle("White Population")+
theme(panel.grid.major = element_line("transparent"),
plot.title = element_text(size = 24, face = "bold"),
legend.title = element_text(size = 12),
legend.text = element_text(size = 12)) +
guides(shape = guide_legend(override.aes = list(size = 8)),
color = guide_legend(override.aes = list(size = 8)))
black <- ggplot(nyc_clean) +
geom_sf(aes(fill = black_count), color = "#8f98aa") +
scale_fill_gradient(low = "#FCF5EE", high = "#F25F5C", guide = guide_legend(title = "Number Black")) +
theme_minimal() +
theme(panel.grid.major = element_line("transparent"),
axis.text = element_blank()) +
ggtitle("Black Population")+
theme(panel.grid.major = element_line("transparent"),
plot.title = element_text(size = 24, face = "bold"),
legend.title = element_text(size = 12),
legend.text = element_text(size = 12)) +
guides(shape = guide_legend(override.aes = list(size = 8)),
color = guide_legend(override.aes = list(size = 8)))
asian <- ggplot(nyc_clean) +
geom_sf(aes(fill = asian_count), color = "#8f98aa") +
scale_fill_gradient(low = "#FCF5EE", high = "#717EC3", guide = guide_legend(title = "Number Asian")) +
theme_minimal() +
theme(panel.grid.major = element_line("transparent"),
axis.text = element_blank()) +
ggtitle("Asian Population")+
theme(panel.grid.major = element_line("transparent"),
plot.title = element_text(size = 24, face = "bold"),
legend.title = element_text(size = 12),
legend.text = element_text(size = 12)) +
guides(shape = guide_legend(override.aes = list(size = 8)),
color = guide_legend(override.aes = list(size = 8)))
latinx <- ggplot(nyc_clean) +
geom_sf(aes(fill = latinx_count), color = "#8f98aa")+
scale_fill_gradient(low = "#FCF5EE", high = "#FC9A38", guide = guide_legend(title = "Number Latinx")) +
theme_minimal() +
theme(panel.grid.major = element_line("transparent"),
axis.text = element_blank()) +
ggtitle("Latinx Population")+
theme(panel.grid.major = element_line("transparent"),
plot.title = element_text(size = 24, face = "bold"),
legend.title = element_text(size = 12),
legend.text = element_text(size = 12)) +
guides(shape = guide_legend(override.aes = list(size = 8)),
color = guide_legend(override.aes = list(size = 8)))
All the data used in this project are from two major sources: the Tidycensus package and NYC Open Data.
Tidycensus is an R package interface, developed by Kyle Walker and Matt Herman, that enables easy access to the US Census Bureau’s data APIs and returns Tidyverse-ready data frames from various major US Census Bureau datasets. Our demographic and socioeconomic data are drawn from the American Community Survey results found in Tidycensus package. A summary of our ACS data variables is below:
total_pop: Total Population by Neighborhood
mean_income: Mean Income by Neighborhood
below_poverty_line_count: Number of People Living Below the 100% Poverty Line by Neighborhood
mean_rent: Mean Rent by Neighborhood
unemployment_count: Number of People on Unemployment by Neighborhood
latinx_count: Number of Latinx People by Neighborhood
white_count: Number of White People by Neighborhood
black_count: Number of Black People by Neighborhood
native_count: Number of Native People by Neighborhood
asian_count: Number of Asian People by Neighborhood
naturalized_citizen_count: Number of Naturalized Citizens by Neighborhood
noncitizen_count: Number of Foreign Born People by Neighborhood
uninsured_count: Number of Uninsured Citizens of any Age by NeighborhoodFor remaining predictors, we used NYC Open Data’s portal to identify specific predictors. In particular, we used geotagged locations of Subway Stops, Bus Stops, Grocery Stores, Schools, and Eviction Sites from the Departments of Transportation, Health, Education, and Housing to calculate neighborhood-specific variables described below:
school_count: Number of Public Schools by Neighborhoodeviction_count: Number of Evictions by Neighborhoodstore_count: Number of Grocery Stores and Food Vendors by Neighborhoodsub_count: Number of Subway Stations by Neighborhoodbus_count: Number of Bus Stations by Neighborhoodperc_covered_by_transit: Percent of Neighborhood within walking distance (.25 miles) of any transit stop.Lastly, we acquired subway ridership from Metropolitan Transportation Authority’s turnstile data for the week of September 7, 2019. For each station, entry/exit of each turnstile is recorded. Then, we aggregated this information by taking the station-specific average of subway ridership across the 7 days in the week. Finally, we geotagged each listed station, then took the mean of ridership at all subway stations in each neighborhood.
Our data has 242 observations of 23 variables. Below is a preview of our dataset with colnames attached.
library(kableExtra)
dim(nyc_clean)
## [1] 223 25
kable(head(nyc_clean)) %>% kable_styling()
| nta_id | total_pop | mean_income | below_poverty_line_count | below_poverty_line_and_50_count | mean_rent | unemployment_count | latinx_count | white_count | black_count | native_count | asian_count | naturalized_citizen_count | noncitizen_count | uninsured_count | school_count | eviction_count | store_count | sub_count | bus_count | mean_ridership | perc_covered_by_transit | transportation_desert_3cat | transportation_desert_4cat | geometry |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| BK0101 | 26308 | 98338.67 | 2397 | 1289 | 2062.667 | 582 | 3284 | 20526 | 482 | 40 | 1052 | 3777 | 3129 | 1797 | 6 | 68 | 71 | 2 | 53 | 9410.500 | 28.36395 | Satisfactory Access | Limited Access | MULTIPOLYGON (((-73.94074 4… |
| BK0102 | 57774 | 101238.92 | 9120 | 4474 | 2330.077 | 1710 | 18227 | 32237 | 1460 | 0 | 4008 | 6802 | 7746 | 3725 | 12 | 204 | 129 | 2 | 97 | 26603.000 | 55.26492 | Satisfactory Access | Satisfactory Access | MULTIPOLYGON (((-73.96355 4… |
| BK0103 | 36891 | 30309.25 | 18285 | 5970 | 1194.875 | 457 | 3351 | 31799 | 1288 | 20 | 194 | 3548 | 1012 | 711 | 6 | 45 | 58 | 3 | 35 | 6348.667 | 87.71022 | Satisfactory Access | Satisfactory Access | MULTIPOLYGON (((-73.96762 4… |
| BK0104 | 41861 | 78746.25 | 8406 | 3876 | 1801.667 | 1678 | 13602 | 17682 | 3960 | 210 | 5515 | 5770 | 5325 | 3040 | 15 | 157 | 117 | 6 | 77 | 8006.000 | 63.00854 | Satisfactory Access | Satisfactory Access | MULTIPOLYGON (((-73.95083 4… |
| BK0201 | 23758 | 140543.00 | 1504 | 585 | 2275.833 | 579 | 1517 | 17643 | 1330 | 44 | 2041 | 2082 | 1448 | 852 | 1 | 25 | 30 | 2 | 18 | 5275.000 | 98.46067 | Satisfactory Access | Satisfactory Access | MULTIPOLYGON (((-73.99066 4… |
| BK0202 | 24603 | 132850.00 | 3776 | 970 | 2413.875 | 1195 | 3772 | 13288 | 3369 | 0 | 2893 | 2151 | 2728 | 1244 | 17 | 111 | 73 | 8 | 104 | 19444.000 | 159.13596 | Excellent Access | Excellent Access | MULTIPOLYGON (((-73.99327 4… |
Below is a summary of each variable’s distribution.
summary(nyc_clean)
## nta_id total_pop mean_income below_poverty_line_count
## Length:223 Min. : 0 Min. : 23149 Min. : 0
## Class :character 1st Qu.:18106 1st Qu.: 50624 1st Qu.: 1544
## Mode :character Median :31876 Median : 67335 Median : 4180
## Mean :32357 Mean : 72094 Mean : 5905
## 3rd Qu.:47184 3rd Qu.: 87155 3rd Qu.: 8785
## Max. :97786 Max. :211822 Max. :28755
## NA's :42
## below_poverty_line_and_50_count mean_rent unemployment_count
## Min. : 0.0 Min. : 792 Min. : 0.0
## 1st Qu.: 830.5 1st Qu.:1321 1st Qu.: 423.5
## Median : 2693.0 Median :1510 Median : 921.0
## Mean : 3091.2 Mean :1603 Mean :1084.3
## 3rd Qu.: 4652.0 3rd Qu.:1746 3rd Qu.:1534.0
## Max. :13934.0 Max. :3279 Max. :4697.0
## NA's :42
## latinx_count white_count black_count native_count
## Min. : 0 Min. : 0 Min. : 0 Min. : 0.00
## 1st Qu.: 1970 1st Qu.: 481 1st Qu.: 469 1st Qu.: 0.00
## Median : 5558 Median : 5010 Median : 2013 Median : 28.00
## Mean : 9660 Mean : 9872 Mean : 7283 Mean : 56.68
## 3rd Qu.:13914 3rd Qu.:14762 3rd Qu.: 9138 3rd Qu.: 72.00
## Max. :60712 Max. :69259 Max. :69697 Max. :601.00
##
## asian_count naturalized_citizen_count noncitizen_count uninsured_count
## Min. : 0.0 Min. : 0 Min. : 0 Min. : 0.0
## 1st Qu.: 265.5 1st Qu.: 2662 1st Qu.: 1554 1st Qu.: 704.5
## Median : 2363.0 Median : 6414 Median : 4625 Median : 1920.0
## Mean : 4530.3 Mean : 6930 Mean : 5237 Mean : 2452.1
## 3rd Qu.: 6054.5 3rd Qu.: 9768 3rd Qu.: 7435 3rd Qu.: 3376.0
## Max. :41314.0 Max. :31847 Max. :25008 Max. :12182.0
##
## school_count eviction_count store_count sub_count
## Min. : 1.000 Min. : 1.0 Min. : 1.00 Min. : 1.000
## 1st Qu.: 2.000 1st Qu.: 48.0 1st Qu.: 17.00 1st Qu.: 1.000
## Median : 5.000 Median : 148.0 Median : 45.00 Median : 1.000
## Mean : 6.924 Mean : 238.1 Mean : 51.98 Mean : 2.399
## 3rd Qu.: 9.500 3rd Qu.: 371.0 3rd Qu.: 76.50 3rd Qu.: 3.000
## Max. :31.000 Max. :1126.0 Max. :202.00 Max. :17.000
##
## bus_count mean_ridership perc_covered_by_transit
## Min. : 1.00 Min. : 273 Min. : 0.00000
## 1st Qu.: 30.00 1st Qu.: 4984 1st Qu.: 0.00836
## Median : 49.00 Median : 8006 Median : 33.95065
## Mean : 52.83 Mean : 12051 Mean : 45.42951
## 3rd Qu.: 68.00 3rd Qu.: 14205 3rd Qu.: 69.10730
## Max. :243.00 Max. :109922 Max. :254.34296
## NA's :100
## transportation_desert_3cat transportation_desert_4cat geometry
## Length:223 Length:223 MULTIPOLYGON :223
## Class :character Class :character epsg:4269 : 0
## Mode :character Mode :character +proj=long...: 0
##
##
##
##
library(egg)
ggarrange(subway_loc, bus_loc, stops, bus_stops, ridership, access, ncol=2)
ggarrange(red, orange, yellow, green, teal, blue, purple, pink, brown, ncol=3)
ggarrange(white, black, latinx, asian, ncol=2)
We fit ungrouped poisson and negative binomial models below:
poisson_non_hierarchical <- stan_glm(
white_count ~ total_pop + mean_income + mean_rent +
unemployment_count + sub_count +
perc_covered_by_transit + school_count +
store_count + bus_count +
eviction_count + uninsured_count,
data = nyc_clean,
family = poisson,
chains = 4, iter = 500*2, seed = 84735, refresh = 0
)
negbin_non_hierarchical <- stan_glm(
white_count ~ total_pop + mean_income + mean_rent +
unemployment_count + sub_count +
perc_covered_by_transit + school_count +
store_count + bus_count +
eviction_count + uninsured_count,
data = nyc_clean,
family = neg_binomial_2,
chains = 4, iter = 500*2, seed = 84735, refresh = 0
)
check_1 <- pp_check(poisson_non_hierarchical) +
xlab("White Resident Count") +
labs(title = "Poisson")+
xlim(0,100000)+
theme(plot.title = element_text(face="bold", size=25, hjust=.5))
check_2 <- pp_check(negbin_non_hierarchical) +
xlab("White Resident Count") +
labs(title = "Negative Binomial")+
xlim(0,100000)+
theme(plot.title = element_text(face="bold", size=25, hjust=.5))
ggpubr::ggarrange(check_1, check_2, ncol=2, common.legend=TRUE, legend = "right")
nyc_predict_clean <- nyc_clean %>%
na.omit()
set.seed(84735)
predictions_poisson <- posterior_predict(
poisson_non_hierarchical, newdata = nyc_predict_clean)
predictions_negbin <- posterior_predict(
poisson_non_hierarchical, newdata = nyc_predict_clean)
library(tidybayes)
library(bayesplot)
ppc_intervals(nyc_predict_clean$white_count, yrep = predictions_poisson,
prob_outer = 0.95) +
ggplot2::scale_x_continuous(
labels = nyc_predict_clean$nta_id,
breaks = 1:nrow(nyc_predict_clean)) +
xaxis_text(angle = 90, hjust = 1) +
theme_linedraw()+
theme(panel.grid.major = element_line("transparent"),
axis.title.y = element_blank(),
axis.text.y = element_blank())+
coord_flip()
ppc_intervals(nyc_predict_clean$white_count, yrep = predictions_negbin,
prob_outer = 0.95) +
ggplot2::scale_x_continuous(
labels = nyc_predict_clean$nta_id,
breaks = 1:nrow(nyc_predict_clean)) +
xaxis_text(angle = 90, hjust = 1) +
theme_linedraw()+
theme(panel.grid.major = element_line("transparent"),
axis.title.y = element_blank(),
axis.text.y = element_blank())+
coord_flip()
\[\begin{split} \text{White}_{ij} \mid \beta_{0j}, \beta_1, ..., \beta_k, \sigma_y & \sim \text{Pois}(\lambda_{ij}) \\ & \text{where} \log(\lambda_{ij}) = \beta_{0j} + \sum^{11}_{k=1}X_{ijk}\beta_k \\ \beta_{0j} \mid \beta_0, \sigma_0 & \stackrel{ind}{\sim} N(\beta_0, \sigma_0^2)\\ \beta_{0c}, \beta_k & \stackrel{ind}{\sim} N(0,1) \\ \sigma_y, \sigma_0 & \stackrel{ind}{\sim} Exp(1) \end{split}\]
poisson_hierarchical <- stan_glmer(
white_count ~ total_pop + mean_income + mean_rent +
unemployment_count + sub_count +
perc_covered_by_transit + school_count +
store_count + bus_count +
eviction_count + uninsured_count + (1 | nta_id),
data = nyc_clean,
family = poisson,
chains = 4, iter = 500*2, seed = 84735, refresh = 0
)
check_1 <- pp_check(poisson_hierarchical) +
xlab("White Resident Count") +
labs(title = "Poisson")+
xlim(0,100000)+
theme(plot.title = element_text(face="bold", size=25, hjust=.5))
library(kableExtra)
tidy(poisson_hierarchical, effects = "fixed", conf.int = TRUE, conf.level = 0.95) %>%
mutate(estimate= ifelse(term == "(Intercept)", exp(estimate), (exp(estimate)-1)*100),
conf.low= ifelse(term == "(Intercept)", exp(conf.low), (exp(conf.low)-1)*100),
conf.high = ifelse(term == "(Intercept)", exp(conf.high), (exp(conf.high)-1)*100))%>%
filter(conf.low > 0 & conf.high > 0 | conf.low < 0 & conf.high < 0) %>%
kable(align = "c", caption = "Hierarchicahl Poisson - Model Summary") %>%
kable_styling()
| term | estimate | std.error | conf.low | conf.high |
|---|---|---|---|---|
| (Intercept) | 1054.7347298 | 0.3992025 | 449.1941244 | 2356.6935076 |
| total_pop | 0.0071261 | 0.0000090 | 0.0051391 | 0.0089718 |
| eviction_count | -0.3148467 | 0.0005230 | -0.4155584 | -0.2091430 |
| uninsured_count | -0.0164431 | 0.0000572 | -0.0274059 | -0.0051331 |
nyc_predict_clean <- nyc_clean %>%
na.omit()
set.seed(84735)
predictions_poisson <- posterior_predict(
poisson_hierarchical, newdata = nyc_predict_clean)
library(tidybayes)
library(bayesplot)
ppc_intervals(nyc_predict_clean$white_count, yrep = predictions_poisson,
prob_outer = 0.95) +
ggplot2::scale_x_continuous(
labels = nyc_predict_clean$nta_id,
breaks = 1:nrow(nyc_predict_clean)) +
xaxis_text(angle = 90, hjust = 1) +
theme_linedraw()+
theme(panel.grid.major = element_line("transparent"),
axis.title.y = element_blank(),
axis.text.y = element_blank())+
coord_flip()
\[\begin{split} \text{White}_{ij} \mid \beta_{0j}, \beta_1, ..., \beta_k, \sigma_y & \sim \text{NegBin}(\mu_{ij}, r) \\ & \text{where} \log(\mu_{ij}) = \beta_{0j} + \sum^{11}_{k=1}X_{ijk}\beta_k \\ \beta_{0j} \mid \beta_0, \sigma_0 & \stackrel{ind}{\sim} N(\beta_0, \sigma_0^2)\\ \beta_{0c}, \beta_k & \stackrel{ind}{\sim} N(0,1) \\ \sigma_y, \sigma_0 & \stackrel{ind}{\sim} Exp(1) \end{split}\]
negbin_hierarchical <- stan_glmer(
white_count ~ total_pop + mean_income + mean_rent +
unemployment_count + sub_count +
perc_covered_by_transit + school_count +
store_count + bus_count +
eviction_count + uninsured_count + (1 | nta_id),
data = nyc_clean,
family = neg_binomial_2,
chains = 4, iter = 500*2, seed = 84735, refresh = 0)
check_2 <- pp_check(negbin_hierarchical) +
xlab("White Resident Count") +
xlim(0,100000)+
labs(title = "Negative Binomial")+
theme(plot.title = element_text(face="bold", size=25, hjust=.5))
tidy(negbin_hierarchical, effects = "fixed", conf.int = TRUE, conf.level = 0.95)%>%
mutate(estimate= ifelse(term == "(Intercept)", exp(estimate), (exp(estimate)-1)*100),
conf.low= ifelse(term == "(Intercept)", exp(conf.low), (exp(conf.low)-1)*100),
conf.high = ifelse(term == "(Intercept)", exp(conf.high), (exp(conf.high)-1)*100))%>%
filter(conf.low > 0 & conf.high > 0 | conf.low < 0 & conf.high < 0) %>%
kable(align = "c", caption = "Hierarchichal Negative Binomial - Model Summary") %>%
kable_styling()
| term | estimate | std.error | conf.low | conf.high |
|---|---|---|---|---|
| (Intercept) | 2153.3432562 | 0.4124044 | 989.4476079 | 4798.3913302 |
| total_pop | 0.0062732 | 0.0000098 | 0.0045646 | 0.0081272 |
| eviction_count | -0.2574631 | 0.0004905 | -0.3540952 | -0.1618246 |
| uninsured_count | -0.0183541 | 0.0000552 | -0.0288213 | -0.0071493 |
nyc_predict_clean <- nyc_clean %>%
na.omit()
set.seed(84735)
predictions_negbin <- posterior_predict(
negbin_hierarchical, newdata = nyc_predict_clean)
ppc_intervals(nyc_predict_clean$white_count, yrep = predictions_negbin,
prob_outer = 0.95) +
ggplot2::scale_x_continuous(
labels = nyc_predict_clean$nta_id,
breaks = 1:nrow(nyc_predict_clean)) +
xaxis_text(angle = 90, hjust = 1) +
theme_linedraw()+
theme(panel.grid.major = element_line("transparent"),
axis.title.y = element_blank(),
axis.text.y = element_blank())+
coord_flip()